package com.trytech.mongoocrawler.client.transport;

import com.trytech.mongoocrawler.client.CryptUtils;

import java.net.URL;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.concurrent.LinkedBlockingDeque;

/**
 * redis内存队列
 */
public abstract class UrlManager {
    protected LinkedBlockingDeque<URL> urlQueue = new LinkedBlockingDeque<URL>();
    //保存不同url的md5码
    private HashMap<String,Object> md5OfUrlMap = new LinkedHashMap<String,Object>();

    public abstract void pushUrl(String url);

    public abstract void pushUrls(String[] urls);

    public abstract URL popUrl();
    /***
     * url去重，使用MD5算法，允许一定的错误率
     * @param url
     * @return
     */
    public boolean isUnique(String url){
        String md5OfUrl = CryptUtils.encryptByMD5(url);
        if(md5OfUrlMap.containsKey(md5OfUrl)){
            return false;
        }
        md5OfUrlMap.put(md5OfUrl, new Object());
        return true;
    }
}
